import os
from math import log10

N = 1e6*13

TARGET_DATA_ALL_SETS = ['7K7K_100w_20210806', '178_100w_20210806', 'Dodonew_100w_20210806',
                        'Tianya_100w_20210806', '17173_100w_20210806', 'Myheritage_100w_20210806',
                        'Twitter_100w_20210806', 'Rockyou_100w_20210806', 'Webhost_100w_20210806', '8Fit_100w_20210806',
                        'DE3_100w_20210806', 'FR1_100w_20210806', 'FR5_100w_20210806']

#TRAINING_DATA_SETS = ['CSDN_100w_20210806', 'GMAIL_100w_20210806', 'EYEEM_100w_20210806', 'FR0_100w_20210806']

TRAINING_DATA_SETS = ['CSDN_100w_20210806', 'EYEEM_100w_20210806']

FLA_MC_RAW_FILE_ROOT = os.path.join('..', '..', 'CommonFiles', 'SemanticMidFiles', 'MonteCarloEvaluation', 'Baseline')

FLA_MC_RAW_FILE_DICT = {}

FLA_RAW_SAMPLE_FILE_DICT = {}
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_178_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '178_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_17173_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '17173_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_Tianya_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Tianya_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_7K7K_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '7K7K_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_Dodonew_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Dodonew_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_Myheritage_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Myheritage_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_Rockyou_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Rockyou_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_Twitter_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Twitter_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_Webhost_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Webhost_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_8Fit_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '8Fit_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_DE3_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'DE_UKN3_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_FR1_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'FR_UKN_FR_1_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_FR5_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'FR_UKN_FR_5_ONLY_PWDS_NO_Trimmed_100w.txt')

FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_178_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '178_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_17173_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '17173_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_Tianya_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Tianya_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_7K7K_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '7K7K_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_Dodonew_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Dodonew_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_Myheritage_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Myheritage_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_Rockyou_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Rockyou_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_Twitter_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Twitter_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_Webhost_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Webhost_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_8Fit_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '8Fit_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_DE3_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'DE_UKN3_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_FR1_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'FR_UKN_FR_1_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['GMAIL_100w_20210806_FR5_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'FR_UKN_FR_5_ONLY_PWDS_NO_Trimmed_100w.txt')

FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_178_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '178_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_17173_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '17173_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_Tianya_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Tianya_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_7K7K_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '7K7K_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_Dodonew_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Dodonew_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_Myheritage_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Myheritage_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_Rockyou_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Rockyou_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_Twitter_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Twitter_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_Webhost_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Webhost_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_8Fit_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '8Fit_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_DE3_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'DE_UKN3_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_FR1_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'FR_UKN_FR_1_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['EYEEM_100w_20210806_FR5_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'FR_UKN_FR_5_ONLY_PWDS_NO_Trimmed_100w.txt')

FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_178_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '178_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_17173_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '17173_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_Tianya_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Tianya_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_7K7K_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '7K7K_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_Dodonew_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Dodonew_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_Myheritage_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Myheritage_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_Rockyou_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Rockyou_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_Twitter_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Twitter_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_Webhost_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'Webhost_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_8Fit_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', '8Fit_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_DE3_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'DE_UKN3_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_FR1_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'FR_UKN_FR_1_ONLY_PWDS_NO_Trimmed_100w.txt')
FLA_RAW_SAMPLE_FILE_DICT['FR0_100w_20210806_FR5_100w_20210806'] = os.path.join('..', '..', 'CommonFiles', 'TargetPasswordSets', 'Random_100w_FLA', 'FR_UKN_FR_5_ONLY_PWDS_NO_Trimmed_100w.txt')



FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_7K7K_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_7k7k_data.txt')
FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_178_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_178_data.txt')
FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_Dodonew_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_dodonew_data.txt')
FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_Tianya_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_tianya_data.txt')
FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_17173_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_17173_data.txt')

FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_Myheritage_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_myheritage_data.txt')
FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_Twitter_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_twitter_data.txt')
FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_Rockyou_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_rockyou_data.txt')
FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_Webhost_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_webhost_data.txt')

FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_8Fit_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_8fit_data.txt')
FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_DE3_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_de3_data.txt')

FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_FR1_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_fr1_data.txt')
FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_FR5_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_CSDN_Baseline', 'raw_fr5_data.txt')


FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_7K7K_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_7k7k_data.txt')
FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_178_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_178_data.txt')
FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_Dodonew_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_dodonew_data.txt')
FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_Tianya_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_tianya_data.txt')
FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_17173_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_17173_data.txt')

FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_Myheritage_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_myheritage_data.txt')
FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_Twitter_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_twitter_data.txt')
FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_Rockyou_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_rockyou_data.txt')
FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_Webhost_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_webhost_data.txt')

FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_8Fit_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_8fit_data.txt')
FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_DE3_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_de3_data.txt')

FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_FR1_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_fr1_data.txt')
FLA_MC_RAW_FILE_DICT['GMAIL_100w_20210806_FR5_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_GMAIL_Baseline', 'raw_fr5_data.txt')


FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_7K7K_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_7k7k_data.txt')
FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_178_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_178_data.txt')
FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_Dodonew_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_dodonew_data.txt')
FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_Tianya_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_tianya_data.txt')
FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_17173_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_17173_data.txt')

FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_Myheritage_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_myheritage_data.txt')
FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_Twitter_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_twitter_data.txt')
FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_Rockyou_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_rockyou_data.txt')
FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_Webhost_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_webhost_data.txt')

FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_8Fit_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_8fit_data.txt')
FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_DE3_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_de3_data.txt')

FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_FR1_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_fr1_data.txt')
FLA_MC_RAW_FILE_DICT['EYEEM_100w_20210806_FR5_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_EYEEM_Baseline', 'raw_fr5_data.txt')


FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_7K7K_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_7k7k_data.txt')
FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_178_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_178_data.txt')
FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_Dodonew_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_dodonew_data.txt')
FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_Tianya_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_tianya_data.txt')
FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_17173_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_17173_data.txt')

FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_Myheritage_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_myheritage_data.txt')
FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_Twitter_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_twitter_data.txt')
FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_Rockyou_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_rockyou_data.txt')
FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_Webhost_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_webhost_data.txt')

FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_8Fit_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_8fit_data.txt')
FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_DE3_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_de3_data.txt')

FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_FR1_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_fr1_data.txt')
FLA_MC_RAW_FILE_DICT['FR0_100w_20210806_FR5_100w_20210806'] = \
    os.path.join(FLA_MC_RAW_FILE_ROOT, 'FLA_MonteCarlo_FR0_Baseline', 'raw_fr5_data.txt')

def Format_raw_data_for_drawing(raw_path, formatted_path, sample_file_path):
    password_frequence_dict = {}
    with open(sample_file_path, 'r', encoding='utf-8') as inf_sample:
        line = inf_sample.readline()
        while line != '':
            password = line.strip('\n')
            if password in password_frequence_dict.keys():
                password_frequence_dict[password] += 1
            else:
                password_frequence_dict[password] = 1
            line = inf_sample.readline()

    with open(formatted_path, 'w', encoding='utf-8') as outf:
        with open(raw_path, 'r', encoding='utf-8') as inf:
            count = 0
            line = inf.readline()
            while line != '':
                elements = line.strip('\n').split('\t')
                if len(elements) > 1:
                    pwd = elements[0]
                    guess_number = float(elements[2])
                    if guess_number == 0:
                        guess_number = 1
                    if pwd in password_frequence_dict.keys():
                        count += password_frequence_dict[pwd]
                        percentage = float(count / N)
                        log10_guess_number = log10(guess_number)
                        outf.write('%d\t%f\t%f\t%f\t%s\n' % (count, percentage, guess_number, log10_guess_number, pwd))
                    else:
                        print('%s is not in sample file.' % pwd)
                        exit(1)
                line = inf.readline()
    
def main():
    for train in TRAINING_DATA_SETS:
        for target in TARGET_DATA_ALL_SETS:
            file_key = '%s_%s' % (train, target)
            if file_key in FLA_MC_RAW_FILE_DICT.keys():
                input_file_path = FLA_MC_RAW_FILE_DICT[file_key]
                output_file_path = input_file_path.replace('raw_', 'fla_')

                if file_key in FLA_RAW_SAMPLE_FILE_DICT.keys():
                    input_sample_file_path = FLA_RAW_SAMPLE_FILE_DICT[file_key]
                else:
                    continue
                print('正在处理[ %s ]' % input_file_path)

                Format_raw_data_for_drawing(input_file_path, output_file_path, input_sample_file_path)




if __name__ == '__main__':
    #raw_path = FLA_MC_RAW_FILE_DICT['CSDN_100w_20210806_7K7K_100w_20210806']
    #formatted_path = raw_path.replace('raw_', 'fla_')
    #sample_file_path = FLA_RAW_SAMPLE_FILE_DICT['CSDN_100w_20210806_7K7K_100w_20210806']
    #Format_raw_data_for_drawing(raw_path, formatted_path, sample_file_path)

    #main()

    raw_path = r'D:\PythonProjects\CommonFiles\SemanticMidFiles\MonteCarloEvaluation\Baseline\FLA_MonteCarlo_GMAIL_Baseline\raw_gmail_all_data.txt'
    formatted_path = r'D:\PythonProjects\CommonFiles\SemanticMidFiles\MonteCarloEvaluation\Baseline\FLA_MonteCarlo_GMAIL_Baseline\fla_gmail_all_data.txt'
    sample_file_path = r'D:\PythonProjects\CommonFiles\TargetPasswordSets\Random_100w_FLA\all_target_datasets.txt'
    Format_raw_data_for_drawing(raw_path, formatted_path, sample_file_path)